pacman::p_load(readr,dplyr,readxl,stringr,tidyr,stringi )
rm(list=ls())
################################################################################


############################################ Brazil 

####### Landuse
final_cols <- c('year','county_id',
                'crops_permanent','crops_seasonal',
                'pasture_natural','pasture_planted',
                'forest_natural', 'forest_planted', 'other')

#1995
df <- read_excel("../../data/landuse/raw/tabela317.xlsx", sheet = 1,skip = 4, .name_repair = "unique_quiet")
df <- df[-nrow(df),][,-c(2, 3)]
names(df)[1] <- "county_id"
df$year <- '1995'
df <- df %>% dplyr::select(year, everything())
colnames(df) <- c('year', 'county_id', 'total','crops_permanent', 'crops_seasonal', 'crops_seasonal_fallow',
                  'pasture_natural', 'pasture_planted','forest_natural', 'forest_planted','not_used', 'not_usable')
df[df == "X" | df == "-"] <- "0"
df <- df %>% mutate(across(where(is.character), as.numeric))
df$crops_seasonal <- df$crops_seasonal + df$crops_seasonal_fallow
df$other <- df$not_used + df$not_usable
df_1 <- df %>% dplyr::select(all_of(final_cols))

#2006
df <- read_excel("../../data/landuse/raw/tabela1011.xlsx", sheet = 2,skip = 4, .name_repair = "unique_quiet")
df <- df[-nrow(df), ]
df <- df[, -c(2, 3)]
names(df)[1] <- "county_id"
df$year <- '2006'
df <- df %>% dplyr::select(year, everything())
colnames(df) <- c('year','county_id','total','crops_permanent','crops_seasonal','crops_forage','floriculture',
                  'pasture_natural','pasture_planted_degraded','pasture_planted',
                  'forest_natural_protected', 'forest_natural_unprotected', 'forest_planted','agroforestry',
                  'aquaculture','construction','degraded_land','not_usable')
df[] <- lapply(df, as.character)
df[df == "X" | df == "-"] <- "0"
df[] <- lapply(df, as.numeric)
df$crops_seasonal <- df$crops_seasonal + df$crops_forage + df$floriculture
df$crops_permanent <- df$crops_permanent + df$agroforestry
df$pasture_planted <- df$pasture_planted_degraded + df$pasture_planted
df$forest_natural <- df$forest_natural_protected + df$forest_natural_unprotected
df$other <- df$aquaculture + df$construction + df$degraded_land + df$not_usable
df_2 <- df %>% dplyr::select(all_of(final_cols))

#2017
df <- read_excel("../../data/landuse/raw/tabela6722.xlsx", sheet = 2,skip = 6, .name_repair = "unique_quiet")
df <- df[-nrow(df), ]
df <- df[, -c(2, 3, 4)]
names(df)[1] <- "county_id"
df$year <- '2017'
df <- df %>% dplyr::select(year, everything())
colnames(df) <- c('year','county_id','total','crops_permanent', 'crops_seasonal','crops_flowers',
                  'pasture_natural','pasture_planted','pasture_planted_degraded',
                  'forest_natural_protected', 'forest_natural_unprotected', 'forest_planted','agroforestry','other_uses')
df[] <- lapply(df, as.character)
df[df == "X" | df == "-"] <- "0"
df[] <- lapply(df, as.numeric)
df$crops_seasonal <- df$crops_seasonal + df$crops_flowers
df$crops_permanent <- df$crops_permanent + df$agroforestry
df$pasture_planted <- df$pasture_planted + df$pasture_planted_degraded
df$forest_natural <- df$forest_natural_protected + df$forest_natural_unprotected
df$other <- df$other_uses
df_3 <- df %>% dplyr::select(all_of(final_cols))

#Bind
df_landuse <- bind_rows(df_1, df_2, df_3)
df_landuse$county_id <- paste0("BR", as.character(df_landuse$county_id))



####### Cropland
final_cols <- c('year','county_id','soybean','maize','sugarcane','wheat','rice','cotton','oats','rye','barley','sorghum','sunflower')

#1995
df <- read_excel("../../data/landuse/raw/tabela501.xlsx", sheet = 1, .name_repair = "unique_quiet")
df <- df[-nrow(df), ]
df <- df[-(1:3), ]
df <- df[, -c(2, 3)]
names(df)[1] <- "county_id"
replace_dict <- c(
  'Algodão em caroço (herbáceo)' = 'cotton',
  'Arroz em casca' = 'rice',
  'Aveia em casca' = 'oats',
  'Cana-de-açúcar' = 'sugarcane',
  'Centeio em grão' = 'rye',
  'Cevada em casca' = 'barley',
  'Feijão em grão (primeira safra)' = 'beans1',
  'Feijão em grão (segunda safra)' = 'beans2',
  'Feijão em grão (terceira safra)' = 'beans3',
  'Feijão verde' = 'beans_green',
  'Girassol (semente)' = 'sunflower',
  'Mandioca (aipim, macaxeira)' = 'cassava',
  'Milho em grão' = 'maize',
  'Milho em espiga (verde)' = 'maize_ear',
  'Soja em grão' = 'soybean',
  'Sorgo em grão' = 'sorghum',
  'Sorgo vassoura' = 'sorghum_bicolor',
  'Trigo em grão' = 'wheat',
  'Trigo preto (mourisco, sarraceno)' = 'wheat_black',
  'Alfafa forrageira' = 'alfalfa_forage',
  'Cana forrageira' = 'sugarcane_forage',
  'Milho forrageiro' = 'maize_forage',
  'Outras forrageiras-capins e leguminosas' = 'other_forage',
  'Palma forrageira' = 'palm_forage',
  'Sorgo forrageiro' = 'sorghum_forage',
  'Sementes de algodão' = 'cotton_seeds',
  'Sementes de arroz' = 'rice_seeds',
  'Sementes de feijão' = 'beans_seeds',
  'Sementes de milho' = 'maize_seeds',
  'Sementes de soja' = 'soybean_seeds',
  'Sementes de trigo' = 'wheat_seeds',
  'Sementes de forrageiras' = 'forage_seeds'
)
new_header <- as.character(unlist(df[1, ]))
new_header[1] <- "county_id"
matched <- match(new_header, names(replace_dict))
new_header[!is.na(matched)] <- replace_dict[matched[!is.na(matched)]]
df <- df[-1, ]
colnames(df) <- new_header
df$year <- "1995"
df <- df %>% dplyr::select(year, everything())
char_cols <- names(df)[sapply(df, is.character)]
df[char_cols] <- lapply(df[char_cols], function(x) {
  x[x %in% c("X", "-", "...")] <- "0"
  return(x)
})
df[char_cols] <- lapply(df[char_cols], as.numeric)
df_1 <- df %>% dplyr::select(all_of(final_cols))

#2006
df <- read_excel("../../data/landuse/raw/tabela822.xlsx", sheet = 2, .name_repair = "unique_quiet")
df <- df[-nrow(df), ]      
df <- df[-(1:3), ][, -c(2, 3, 4)]
names(df)[1] <- "county_id"
replace_dict <- c(
  'Algodão herbáceo' = 'cotton',
  'Arroz em casca' = 'rice',
  'Aveia branca em grão' = 'oats',
  'Cana-de-açúcar' = 'sugarcane',
  'Centeio em grão' = 'rye',
  'Cevada em casca' = 'barley',
  'Feijão preto em grão' = 'beans_black',
  'Feijão de cor em grão' = 'beans_colored',
  'Feijão fradinho em grão' = 'beans_blackeyed',
  'Feijão verde' = 'beans_green',
  'Girassol (semente)' = 'sunflower',
  'Mandioca (aipim, macaxeira)' = 'cassava',
  'Milho em grão' = 'maize',
  'Soja em grão' = 'soybean',
  'Sorgo em grão' = 'sorghum',
  'Sorgo vassoura' = 'sorghum_bicolor',
  'Trigo em grão' = 'wheat',
  'Trigo preto em grão' = 'wheat_black',
  'Forrageiras para corte' = 'other_forage',
  'Cana forrageira' = 'sugarcane_forage',
  'Milho forrageiro' = 'maize_forage',
  'Sorgo forrageiro' = 'sorghum_forage',
  'Sementes de algodão (produzidas para plantio)' = 'cotton_seeds',
  'Sementes de arroz (produzidas para plantio)' = 'rice_seeds',
  'Sementes de feijão (produzidas para plantio)' = 'beans_seeds',
  'Sementes de milho (produzidas para plantio)' = 'maize_seeds',
  'Sementes de soja (produzidas para plantio)' = 'soybean_seeds',
  'Sementes de trigo (produzidas para plantio)' = 'wheat_seeds',
  'Sementes de forrageiras (produzidas para plantio)' = 'forage_seeds'
)
new_header <- as.character(unlist(df[1, ]))
new_header[1] <- "county_id"
matched <- match(new_header, names(replace_dict))
new_header[!is.na(matched)] <- replace_dict[matched[!is.na(matched)]]
df <- df[-(1:3), ]
colnames(df) <- new_header
df$year <- "2006"
df <- df %>% dplyr::select(year, everything())
char_cols <- names(df)[sapply(df, is.character)]
df[char_cols] <- lapply(df[char_cols], function(x) {
  x[x %in% c("X", "-", "...")] <- "0"
  return(x)
})
df[char_cols] <- lapply(df[char_cols], as.numeric)
df_2 <- df %>% dplyr::select(all_of(final_cols))

#2017
df <- read_excel("../../data/landuse/raw/tabela6957.xlsx", sheet = 2, .name_repair = "unique_quiet")
df <- df[-nrow(df), ]
df <- df[-(1:4), ][, -c(2, 3, 4)]
names(df)[1] <- "county_id"
replace_dict <- c(
  'Algodão herbáceo' = 'cotton',
  'Arroz em casca' = 'rice',
  'Aveia branca em grão' = 'oats',
  'Cana-de-açúcar' = 'sugarcane',
  'Centeio em grão' = 'rye',
  'Cevada em casca' = 'barley',
  'Feijão preto em grão' = 'beans_black',
  'Feijão de cor em grão' = 'beans_colored',
  'Feijão fradinho em grão' = 'beans_blackeyed',
  'Feijão verde' = 'beans_green',
  'Girassol (semente)' = 'sunflower',
  'Mandioca (aipim, macaxeira)' = 'cassava',
  'Milho em grão' = 'maize',
  'Soja em grão' = 'soybean',
  'Sorgo em grão' = 'sorghum',
  'Sorgo vassoura' = 'sorghum_bicolor',
  'Trigo em grão' = 'wheat',
  'Trigo preto em grão' = 'wheat_black',
  'Forrageiras para corte' = 'other_forage',
  'Cana forrageira' = 'sugarcane_forage',
  'Milho forrageiro' = 'maize_forage',
  'Palma forrageira' = 'palm_forage',
  'Sorgo forrageiro' = 'sorghum_forage',
  'Sementes de algodão (produzidas para plantio)' = 'cotton_seeds',
  'Sementes de arroz (produzidas para plantio)' = 'rice_seeds',
  'Sementes de feijão (produzidas para plantio)' = 'beans_seeds',
  'Sementes de milho (produzidas para plantio)' = 'maize_seeds',
  'Sementes de soja (produzidas para plantio)' = 'soybean_seeds',
  'Sementes de trigo (produzidas para plantio)' = 'wheat_seeds',
  'Sementes de forrageiras (produzidas para plantio)' = 'forage_seeds'
)
new_header <- as.character(unlist(df[1, ]))
new_header[1] <- "county_id"
matched <- match(new_header, names(replace_dict))
new_header[!is.na(matched)] <- replace_dict[matched[!is.na(matched)]]
df <- df[-1, ]
colnames(df) <- new_header
df$year <- "2017"
df <- df %>% dplyr::select(year, everything())
char_cols <- names(df)[sapply(df, is.character)]
df[char_cols] <- lapply(df[char_cols], function(x) {
  x[x %in% c("X", "-", "...")] <- "0"
  return(x)
})
df[char_cols] <- lapply(df[char_cols], as.numeric)
df_3 <- df %>% dplyr::select(all_of(final_cols))

#Bind
df_crops <- bind_rows(df_1, df_2, df_3)
df_crops$county_id <- paste0("BR", as.character(df_crops$county_id))

#Merge
df <- merge(df_landuse, df_crops, by = c("year", "county_id"), all = TRUE)
df$crops_total <- rowSums(df[, c("soybean", "maize", "sugarcane", "wheat", "rice","cotton", "oats", "rye", "barley", "sorghum", "sunflower")], na.rm = TRUE)
df$crops_seasonal2 <- pmax(df$crops_seasonal, df$crops_total, na.rm = TRUE)
df$crops_total <- NULL
df[is.na(df)] <- 0
write.csv(df, gzfile("../../data/landuse/clean/landuse_brazil_county.csv.gz"), row.names = FALSE)



############################################ Argentina

####### Landuse
final_cols <- c("year", "county_id",
                "crops_seasonal", "crops_permanent", "forage_seasonal", "forage_permanent", "crops_unaccounted",
                "pasture", "forest_planted", "forest_natural", "not_used", "not_usable","trails_and_parks", "unaccounted")
province_list <- c("buenosaires", "catamarca", "chaco", "chubut", "cordoba", "corrientes", "entrerios",
                   "formosa", "jujuy", "lapampa", "larioja", "mendoza", "misiones", "neuquen", "rionegro",
                   "salta", "sanjuan", "sanluis", "santacruz", "santafe", "santiagodelestero",
                   "tierradelfuego", "tucuman")
province_name <- c("Buenos Aires", "Catamarca", "Chaco", "Chubut", "Cordoba", "Corrientes", "Entre Rios",
                   "Formosa", "Jujuy", "La Pampa", "La Rioja", "Mendoza", "Misiones", "Neuquen", "Rio Negro",
                   "Salta", "San Juan", "San Luis", "Santa Cruz", "Santa Fe", "Santiago del Estero",
                   "Tierra del Fuego", "Tucuman")

#2002
df_2002 <- NULL
for (n_p in seq_along(province_list)){
  
  df <- read_excel(paste0('../../data/landuse/raw/cna2002/', province_list[n_p], '.xls'), sheet = 1, .name_repair = "unique_quiet")
  colnames(df) <- c('county', 'total_planted_and_nonplanted', 'total_planted',
                    'crops_seasonal', 'crops_permanent', 'space1',
                    'forage_seasonal', 'forage_permanent', 'forest_planted', 'crops_unaccounted','space2',
                    'total_nonplanted', 'pasture', 'forest_natural', 'not_used', 'not_usable','trails_and_parks', 'unaccounted')
  df <- df %>% dplyr::select(-space1, -space2)
  df <- df[-(1:8), ]
  df <- head(df, -5)
  df[df == "-"] <- "0"
  df$year <- "2002"
  df$state <- province_name[n_p]
  df <- df %>% relocate(year, state, .before = county)
  df <- df %>% mutate(across(c(county, state), ~ toupper(.))) %>%
    mutate(across(c(county, state), ~ str_trim(.))) %>%
    mutate(across(c(county, state), ~ stri_trans_general(., "Latin-ASCII")))
  df <- df %>% mutate_at(c('total_planted_and_nonplanted', 'total_planted',
                           'crops_seasonal', 'crops_permanent',
                           'forage_seasonal', 'forage_permanent', 'forest_planted', 'crops_unaccounted',
                           'total_nonplanted', 'pasture', 'forest_natural', 'not_used', 'not_usable',
                           'trails_and_parks', 'unaccounted'), as.numeric)
  if(n_p == 0){df_2002 <- df} else {df_2002 <- bind_rows(df_2002, df)}
}


#2008
df_2008 <- NULL
for (n_p in seq_along(province_list)) {
  
  filepath <- paste0("../../data/landuse/raw/cna2008/", province_list[n_p], ".xls")
  df <- read_excel(filepath, sheet = 6, .name_repair = "unique_quiet")  
  colnames(df) <- c('county', 
                    'total_planted_and_nonplanted', 'total_planted',
                    'crops_seasonal', 'crops_permanent',
                    'forage_seasonal', 'forage_permanent',
                    'forest_planted', 'crops_unaccounted',
                    'total_nonplanted', 'pasture', 'forest_natural',
                    'not_used', 'not_usable', 'trails_and_parks', 'unaccounted')
  df <- df[-(1:8), ] 
  df <- head(df, -1)
  df[df == "-"] <- "0"            
  df$year <- "2008"
  df$state <- province_name[n_p]
  df <- df %>% dplyr::select(year, state, everything())
  df <- df %>% mutate(across(c(county, state), ~ toupper(.))) %>%
    mutate(across(c(county, state), ~ stri_trim_both(.))) %>%
    mutate(across(c(county, state), ~ stri_trans_general(., "Latin-ASCII")))
  df <- df %>% mutate_at(c('total_planted_and_nonplanted', 'total_planted',
                           'crops_seasonal', 'crops_permanent',
                           'forage_seasonal', 'forage_permanent', 
                           'forest_planted', 'crops_unaccounted',
                           'total_nonplanted', 'pasture', 'forest_natural', 
                           'not_used', 'not_usable','trails_and_parks', 'unaccounted'), as.numeric)
  if (is.null(df_2008)) {df_2008 <- df} else {df_2008 <- bind_rows(df_2008, df)}
}

#2018
df_2018 <- NULL
for (n_p in seq_len(23)) {
  sheet_num <- as.character(3.4 + n_p)
  df <- read_excel(paste0("../../data/landuse/raw/cna2018/CNA18_C_3_4.xlsx"), sheet = paste0("3.4.", n_p), .name_repair = "unique_quiet")
  colnames(df) <- c('space1', 'county',
                    'total_planted_and_nonplanted', 'total_planted',
                    'crops_seasonal', 'crops_permanent',
                    'forage_seasonal', 'forage_permanent', 'forest_planted', 'crops_unaccounted',
                    'space2', 'total_nonplanted', 'pasture', 'forest_natural',
                    'not_used', 'not_usable', 'trails_and_parks', 'space3', 'unaccounted')
  df <- df %>% dplyr::select(-space1, -space2, -space3)
  df <- df[-(1:8), ]
  df <- head(df, -4)
  df[df == "-"] <- "0"
  df$year <- "2018"
  df$state <- province_name[n_p]
  df <- df %>% dplyr::select(year, state, everything())
  df <- df %>% mutate(across(c(county, state), toupper)) %>%
    mutate(across(c(county, state), str_trim)) %>%
    mutate(across(c(county, state), ~stringi::stri_trans_general(., "Latin-ASCII")))
  df <- df %>% mutate_at(c('total_planted_and_nonplanted', 'total_planted',
                           'crops_seasonal', 'crops_permanent',
                           'forage_seasonal', 'forage_permanent', 'forest_planted', 'crops_unaccounted',
                           'total_nonplanted', 'pasture', 'forest_natural', 
                           'not_used', 'not_usable','trails_and_parks', 'unaccounted'), as.numeric)
  if (is.null(df_2018)) {df_2018 <- df} else {df_2018 <- bind_rows(df_2018, df)}
}

#Merge
df <- bind_rows(df_2002, df_2008, df_2018)
df$county <- str_replace_all(
  df$county,
  c("1 DE MAYO" = "PRIMERO DE MAYO",
    "1° DE MAYO" = "PRIMERO DE MAYO",
    "12 DE OCTUBRE" = "DOCE DE OCTUBRE",
    "2 DE ABRIL" = "DOS DE ABRIL",
    "25 DE MAYO" = "VEINTICINCO DE MAYO",
    "9 DE JULIO" = "NUEVE DE JULIO",
    "ADOLFO GONZALES CHAVES" = "GONZALES CHAVES",
    "CORONEL DE MARINA L ROSALES" = "CORONEL ROSALES",
    "CORONEL DE MARINA LEONARDO ROSALES" = "CORONEL ROSALES",
    "CONCEPCION DE LA SIERRA" = "CONCEPCION",
    "FLORENTINO AMEGHINO" = "AMEGHINO",
    "GENERAL JUAN MADARIAGA" = "GENERAL MADARIAGA",
    "LDOR GRAL SAN MARTIN" = "LIBERTADOR GENERAL SAN MARTIN",
    "LEANDRO N ALEM" = "LEANDRO N. ALEM",
    "GRAL BELGRANO" = "GENERAL MANUEL BELGRANO",
    "LA CANDELARIA" = "CANDELARIA",
    "DR MANUEL BELGRANO" = "CAPITAL",
    "LA CAPITAL" = "CAPITAL"))
df_1 <- df
df_2 <- read_csv("../../data/landuse/clean/geographicunits_argentina.csv.gz",show_col_types = FALSE)
df <- inner_join(df_2, df_1, by = c("county", "state"), keep = TRUE)
df_landuse <- df %>% dplyr::select(all_of(final_cols))


####### Cropland
final_cols <- c("year", "county_id", "soybean", "maize", "sugarcane", "wheat", "rice", "cotton", "oats", "rye", "barley", "sorghum", "sunflower")
crop_list <- c("soybean", "maize", "wheat", "rice", "cotton", "oats", "rye", "sorghum", "sunflower")
df <- read_csv("../../data/landuse/clean/cropland_argentina_county.csv.gz",show_col_types = FALSE)
df <- df %>% filter(crop %in% crop_list, year %in% c(2002, 2008, 2018)) %>%
  dplyr::select(year, county_id, area_planted_ha, crop)
df <- df %>% filter(!is.na(county_id) & !str_detect(county_id, "c\\("))
df_crops <- df %>% pivot_wider( id_cols = c(year, county_id), names_from = crop,values_from = area_planted_ha) %>% arrange(year, county_id)
df_crops <- df_crops %>% mutate(year = as.character(year))

#Merge
df <- full_join(df_landuse, df_crops, by = c("year", "county_id"))
df <- df %>% mutate(crops_total = rowSums(across(c(cotton, maize, oats, rice, rye, sorghum, soybean, sunflower, wheat)), na.rm = TRUE),
                    crops_seasonal2 = pmax(crops_seasonal, crops_total, na.rm = TRUE)) %>% dplyr::select(-crops_total)
df <- df %>% mutate(across(where(is.numeric), ~replace_na(., 0)))
write.csv(df, gzfile("../../data/landuse/clean/landuse_argentina_county.csv.gz"), row.names = FALSE)
